1 /*
2 * Copyright (C) 2009 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package com.google.common.escape;
18
19 import static com.google.common.base.Preconditions.checkNotNull;
20
21 import com.google.common.annotations.Beta;
22 import com.google.common.annotations.GwtCompatible;
23
24 import java.util.Map;
25
26 /**
27 * A {@link CharEscaper} that uses an array to quickly look up replacement
28 * characters for a given {@code char} value. An additional safe range is
29 * provided that determines whether {@code char} values without specific
30 * replacements are to be considered safe and left unescaped or should be
31 * escaped in a general way.
32 *
33 * <p>A good example of usage of this class is for Java source code escaping
34 * where the replacement array contains information about special ASCII
35 * characters such as {@code \\t} and {@code \\n} while {@link #escapeUnsafe}
36 * is overridden to handle general escaping of the form {@code \\uxxxx}.
37 *
38 * <p>The size of the data structure used by {@link ArrayBasedCharEscaper} is
39 * proportional to the highest valued character that requires escaping.
40 * For example a replacement map containing the single character
41 * '{@code \}{@code u1000}' will require approximately 16K of memory. If you
42 * need to create multiple escaper instances that have the same character
43 * replacement mapping consider using {@link ArrayBasedEscaperMap}.
44 *
45 * @author Sven Mawson
46 * @author David Beaumont
47 * @since 15.0
48 */
49 @Beta
50 @GwtCompatible
51 public abstract class ArrayBasedCharEscaper extends CharEscaper {
52 // The replacement array (see ArrayBasedEscaperMap).
53 private final char[][] replacements;
54 // The number of elements in the replacement array.
55 private final int replacementsLength;
56 // The first character in the safe range.
57 private final char safeMin;
58 // The last character in the safe range.
59 private final char safeMax;
60
61 /**
62 * Creates a new ArrayBasedCharEscaper instance with the given replacement map
63 * and specified safe range. If {@code safeMax < safeMin} then no characters
64 * are considered safe.
65 *
66 * <p>If a character has no mapped replacement then it is checked against the
67 * safe range. If it lies outside that, then {@link #escapeUnsafe} is
68 * called, otherwise no escaping is performed.
69 *
70 * @param replacementMap a map of characters to their escaped representations
71 * @param safeMin the lowest character value in the safe range
72 * @param safeMax the highest character value in the safe range
73 */
74 protected ArrayBasedCharEscaper(Map<Character, String> replacementMap,
75 char safeMin, char safeMax) {
76
77 this(ArrayBasedEscaperMap.create(replacementMap), safeMin, safeMax);
78 }
79
80 /**
81 * Creates a new ArrayBasedCharEscaper instance with the given replacement map
82 * and specified safe range. If {@code safeMax < safeMin} then no characters
83 * are considered safe. This initializer is useful when explicit instances of
84 * ArrayBasedEscaperMap are used to allow the sharing of large replacement
85 * mappings.
86 *
87 * <p>If a character has no mapped replacement then it is checked against the
88 * safe range. If it lies outside that, then {@link #escapeUnsafe} is
89 * called, otherwise no escaping is performed.
90 *
91 * @param escaperMap the mapping of characters to be escaped
92 * @param safeMin the lowest character value in the safe range
93 * @param safeMax the highest character value in the safe range
94 */
95 protected ArrayBasedCharEscaper(ArrayBasedEscaperMap escaperMap,
96 char safeMin, char safeMax) {
97
98 checkNotNull(escaperMap); // GWT specific check (do not optimize)
99 this.replacements = escaperMap.getReplacementArray();
100 this.replacementsLength = replacements.length;
101 if (safeMax < safeMin) {
102 // If the safe range is empty, set the range limits to opposite extremes
103 // to ensure the first test of either value will (almost certainly) fail.
104 safeMax = Character.MIN_VALUE;
105 safeMin = Character.MAX_VALUE;
106 }
107 this.safeMin = safeMin;
108 this.safeMax = safeMax;
109 }
110
111 /*
112 * This is overridden to improve performance. Rough benchmarking shows that
113 * this almost doubles the speed when processing strings that do not require
114 * any escaping.
115 */
116 @Override
117 public final String escape(String s) {
118 checkNotNull(s); // GWT specific check (do not optimize).
119 for (int i = 0; i < s.length(); i++) {
120 char c = s.charAt(i);
121 if ((c < replacementsLength && replacements[c] != null) ||
122 c > safeMax || c < safeMin) {
123 return escapeSlow(s, i);
124 }
125 }
126 return s;
127 }
128
129 /**
130 * Escapes a single character using the replacement array and safe range
131 * values. If the given character does not have an explicit replacement and
132 * lies outside the safe range then {@link #escapeUnsafe} is called.
133 */
134 @Override protected final char[] escape(char c) {
135 if (c < replacementsLength) {
136 char[] chars = replacements[c];
137 if (chars != null) {
138 return chars;
139 }
140 }
141 if (c >= safeMin && c <= safeMax) {
142 return null;
143 }
144 return escapeUnsafe(c);
145 }
146
147 /**
148 * Escapes a {@code char} value that has no direct explicit value in the
149 * replacement array and lies outside the stated safe range. Subclasses should
150 * override this method to provide generalized escaping for characters.
151 *
152 * <p>Note that arrays returned by this method must not be modified once they
153 * have been returned. However it is acceptable to return the same array
154 * multiple times (even for different input characters).
155 *
156 * @param c the character to escape
157 * @return the replacement characters, or {@code null} if no escaping was
158 * required
159 */
160 // TODO(user,cpovirk): Rename this something better once refactoring done
161 protected abstract char[] escapeUnsafe(char c);
162 }